<?php

/**
 * SAE数据抓取服务
 *
 * @author  zhiyong
 * @version $Id$
 * @package sae
 *
 */

/**
 * SAE数据抓取class
 *
 * SaeFetchurl用于抓取外部数据。支持的协议为http/https。<br />
 * 该类已被废弃，请直接使用curl抓取外部资源
 * @deprecated 该类已被废弃，请直接使用curl抓取外部资源
 *
 * 默认超时时间：
 *  - 连接超时： 5秒
 *  - 发送数据超时： 30秒
 *  - 接收数据超时： 40秒
 *
 * 抓取页面
 * <code>
 * $f = new SaeFetchurl();
 * $content = $f->fetch('http://sina.cn');
 * </code>
 *
 * 发起POST请求
 * <code>
 * $f = new SaeFetchurl();
 * $f->setMethod('post');
 * $f->setPostData( array('name'=> 'easychen' , 'email' => 'easychen@gmail.com' , 'file' => '文件的二进制内容') );
 * $ret = $f->fetch('http://photo.sinaapp.com/save.php');
 *
 * //抓取失败时输出错误码和错误信息
 * if ($ret === false)
 *         var_dump($f->errno(), $f->errmsg());
 * </code>
 *
 * 错误码参考：
 *  - errno: 0         成功
 *  - errno: 600     fetchurl 服务内部错误
 *  - errno: 601     accesskey 不存在
 *  - errno: 602     认证错误，可能是secretkey错误
 *  - errno: 603     超出fetchurl的使用配额
 *  - errno: 604     REST 协议错误，相关的header不存在或其它错误，建议使用SAE提供的fetch_url函数
 *  - errno: 605     请求的URI格式不合法
 *  - errno: 606     请求的URI，服务器不可达。
 *
 * @author  zhiyong
 * @version $Id$
 * @package sae
 *
 */
class SaeFetchurl extends SaeObject
{

    function __construct($akey = NULL, $skey = NULL)
    {
        if ($akey===NULL) $akey = SAE_ACCESSKEY;

        if ($skey===NULL) $skey = SAE_SECRETKEY;

        $this->impl_ = new FetchUrl($akey, $skey);
        $this->method_ = "get";
        $this->cookies_ = array();
        $this->opt_ = array();
        $this->headers_ = array();
    }

    /**
     * 设置acccesskey和secretkey
     *
     * 使用当前的应用的key时,不需要调用此方法
     *
     * @param string $akey
     * @param string $skey
     * @return void
     * @author zhiyong
     */
    public function setAuth($akey, $skey)
    {
        $this->impl_->setAccesskey($akey);
        $this->impl_->setSecretkey($skey);
    }

    /**
     * @ignore
     */
    public function setAccesskey($akey)
    {
        $this->impl_->setAccesskey($akey);
    }

    /**
     * @ignore
     */
    public function setSecretkey($skey)
    {
        $this->impl_->setSecretkey($skey);
    }

    /**
     * 设置请求的方法(POST/GET/PUT... )
     *
     * @param string $method
     * @return void
     * @author zhiyong
     */
    public function setMethod($method)
    {
        $this->method_ = trim($method);
        $this->opt_['method'] = trim($method);
    }

    /**
     * 设置POST方法的数据
     *
     * @param array|string $post_data 当格式为array时，key为变量名称,value为变量值，使用multipart方式提交。当格式为string时，直接做为post的content提交。与curl_setopt($ch, CURLOPT_POSTFIELDS, $data)中$data的格式相同。
     * @param bool $multipart value是否为二进制数据
     * @return bool
     * @author zhiyong
     */
    public function setPostData($post_data, $multipart = false)
    {
        $this->opt_["post"] = $post_data;
        $this->opt_["multipart"] = $multipart;

        return true;
    }

    /**
     * 在发起的请求中,添加请求头
     *
     * 不可以使用此方法设定的头：
     *  - Content-Length
     *  - Host
     *  - Vary
     *  - Via
     *  - X-Forwarded-For
     *  - FetchUrl
     *  - AccessKey
     *  - TimeStamp
     *  - Signature
     *  - AllowTruncated    //可使用setAllowTrunc方法来进行设定
     *  - ConnectTimeout    //可使用setConnectTimeout方法来进行设定
     *  - SendTimeout        //可使用setSendTimeout方法来进行设定
     *  - ReadTimeout        //可使用setReadTimeout方法来进行设定
     *
     *
     * @param string $name
     * @param string $value
     * @return bool
     * @author zhiyong
     */
    public function setHeader($name, $value)
    {
        $name = trim($name);
        if (!in_array(strtolower($name), FetchUrl::$disabledHeaders))
        {
            $this->headers_[$name] = $value;
            return true;
        }
        else
        {
            trigger_error("Disabled FetchUrl Header:".$name, E_USER_NOTICE);
            return false;
        }
    }

    /**
     * 设置FetchUrl参数
     *
     * 参数列表：
     *  - truncated        布尔        是否截断
     *  - redirect            布尔        是否支持重定向
     *  - username            字符串        http认证用户名
     *  - password            字符串        http认证密码
     *  - useragent        字符串        自定义UA
     *
     * @param string $name
     * @param string $value
     * @return void
     * @author Elmer Zhang
     * @ignore
     */
    public function setOpt($name, $value)
    {
        $name = trim($name);
        $this->opt_[$name] = $value;
    }

    /**
     * 在发起的请求中,批量添加cookie数据
     *
     * @param array $cookies 要添加的Cookies，格式：array('key1' => 'value1', 'key2' => 'value2', ....)
     * @return void
     * @author zhiyong
     */
    public function setCookies($cookies = array())
    {
        if (is_array($cookies) and !empty($cookies))
        {
            foreach ( $cookies as $k => $v )
            {
                $this->setCookie($k, $v);
            }
        }
    }

    /**
     * 在发起的请求中,添加cookie数据,此函数可多次调用,添加多个cookie
     *
     * @param string $name
     * @param string $value
     * @return void
     * @author zhiyong
     */
    public function setCookie($name, $value)
    {
        $name = trim($name);
        array_push($this->cookies_, "$name=$value");
    }

    /**
     * 是否允许截断，默认为不允许
     *
     * 如果设置为true,当发送数据超过允许大小时,自动截取符合大小的部分;<br />
     * 如果设置为false,当发送数据超过允许大小时,直接返回false;
     *
     * @param bool $allow
     * @return void
     * @author zhiyong
     */
    public function setAllowTrunc($allow)
    {
        $this->opt_["truncated"] = $allow;
    }

    /**
     * 设置连接超时时间,此时间必须小于SAE系统设置的时间,否则以SAE系统设置为准（默认为5秒）
     *
     * @param int $ms 毫秒
     * @return void
     * @author zhiyong
     */
    public function setConnectTimeout($ms)
    {
        $this->opt_["connecttimeout"] = $ms;
    }

    /**
     * 设置发送超时时间,此时间必须小于SAE系统设置的时间,否则以SAE系统设置为准（默认为20秒）
     *
     * @param int $ms 毫秒
     * @return void
     * @author zhiyong
     */
    public function setSendTimeout($ms)
    {
        $this->opt_["sendtimeout"] = $ms;
    }

    /**
     * 设置读取超时时间,此时间必须小于SAE系统设置的时间,否则以SAE系统设置为准（默认为60秒）
     *
     * @param int $ms 毫秒
     * @return void
     * @author zhiyong
     */
    public function setReadTimeout($ms)
    {
        $this->opt_["ReadTimeout"] = $ms;
    }

    /**
     * 当请求页面是转向页时,是否允许跳转,SAE最大支持5次跳转(默认不跳转)
     *
     * @param bool $allow 是否允许跳转。true:允许，false:禁止，默认为true
     * @return void
     * @author zhiyong
     */
    public function setAllowRedirect($allow = true)
    {
        $this->opt_["redirect"] = $allow;
    }

    /**
     * 设置HTTP认证用户名密码
     *
     * @param string $username HTTP认证用户名
     * @param string $password HTTP认证密码
     * @return void
     * @author zhiyong
     */
    public function setHttpAuth($username, $password)
    {
        $this->opt_["username"] = $username;
        $this->opt_["password"] = $password;
    }

    /**
     * 发起请求
     *
     * <code>
     * <?php
     * echo "Use callback function\n";
     *
     * function demo($content) {
     *         echo strtoupper($content);
     * }
     *
     * $furl = new SaeFetchurl();
     * $furl->fetch($url, $opt, 'demo');
     *
     * echo "Use callback class\n";
     *
     * class Ctx {
     *      public function demo($content) {
     *                 $this->c .= $content;
     *         }
     *         public $c;
     * };
     *
     * $ctx = new Ctx;
     * $furl = new SaeFetchurl();
     * $furl->fetch($url, $opt, array($ctx, 'demo'));
     * echo $ctx->c;
     * ?>
     * </code>
     *
     * @param string $url
     * @param array $opt 请求参数，格式：array('key1'=>'value1', 'key2'=>'value2', ... )。参数列表：
     *  - truncated        布尔        是否截断
     *  - redirect            布尔        是否支持重定向
     *  - username            字符串        http认证用户名
     *  - password            字符串        http认证密码
     *  - useragent        字符串        自定义UA
     * @param callback $callback 用来处理返回的数据的函数。可以为函数名或某个实例对象的方法。
     * @return string 成功时读取到的内容，否则返回false
     * @author zhiyong
     */
    public function fetch($url, $opt = NULL, $callback = NULL)
    {
        if (count($this->cookies_)!=0)
        {
            $this->opt_["cookie"] = join("; ", $this->cookies_);
        }
        $opt = ($opt)?array_merge($this->opt_, $opt):$this->opt_;
        return $this->impl_->fetch($url, $opt, $this->headers_, $callback);
    }

    /**
     * 返回数据的header信息
     *
     * @param bool $parse 是否解析header，默认为true。
     * @return array
     * @author zhiyong
     */
    public function responseHeaders($parse = true)
    {
        $items = explode("\r\n", $this->impl_->headerContent());
        if (!$parse)
        {
            return $items;
        }
        array_shift($items);
        $headers = array();
        foreach ( $items as $_ )
        {
            $pos = strpos($_, ":");
            $key = trim(substr($_, 0, $pos));
            $value = trim(substr($_, $pos+1));
            if ($key=="Set-Cookie")
            {
                if (array_key_exists($key, $headers))
                {
                    array_push($headers[$key], trim($value));
                }
                else
                {
                    $headers[$key] = array(
                        trim($value)
                    );
                }
            }
            else
            {
                $headers[$key] = trim($value);
            }
        }
        return $headers;
    }

    /**
     * 返回HTTP状态码
     *
     * @return int
     * @author Elmer Zhang
     */
    public function httpCode()
    {
        return $this->impl_->httpCode();
    }

    /**
     * 返回网页内容
     * 常用于fetch()方法返回false时
     *
     * @return string
     * @author Elmer Zhang
     */
    public function body()
    {
        return $this->impl_->body();
    }

    /**
     * 返回头里边的cookie信息
     *
     * @param bool $all 是否返回完整Cookies信息。为true时，返回Cookie的name,value,path,max-age，为false时，只返回Cookies的name, value
     * @return array
     * @author zhiyong
     */
    public function responseCookies($all = true)
    {
        $header = $this->impl_->headerContent();
        $matchs = array();
        $cookies = array();
        $kvs = array();
        if (preg_match_all('/Set-Cookie:\s([^\r\n]+)/i', $header, $matchs))
        {
            foreach ( $matchs[1] as $match )
            {
                $cookie = array();
                $items = explode(";", $match);
                foreach ( $items as $_ )
                {
                    $item = explode("=", trim($_));
                    $cookie[$item[0]] = $item[1];
                }
                array_push($cookies, $cookie);
                $kvs = array_merge($kvs, $cookie);
            }
        }
        if ($all)
        {
            return $cookies;
        }
        else
        {
            unset($kvs['path']);
            unset($kvs['max-age']);
            return $kvs;
        }
    }

    /**
     * 返回错误码
     *
     * @return int
     * @author zhiyong
     */
    public function errno()
    {
        if ($this->impl_->errno()!=0)
        {
            return $this->impl_->errno();
        }
        else
        {
            if ($this->impl_->httpCode()!=200)
            {
                return $this->impl_->httpCode();
            }
        }
        return 0;
    }

    /**
     * 返回错误信息
     *
     * @return string
     * @author zhiyong
     */
    public function errmsg()
    {
        if ($this->impl_->errno()!=0)
        {
            return $this->impl_->error();
        }
        else
        {
            if ($this->impl_->httpCode()!=200)
            {
                return $this->impl_->httpDesc();
            }
        }
        return "";
    }

    /**
     * 将对象的数据重新初始化,用于多次重用一个SaeFetchurl对象
     *
     * @return void
     * @author Elmer Zhang
     */
    public function clean()
    {
        $this->__construct();
    }

    /**
     * 开启/关闭调试模式
     *
     * @param bool $on true：开启调试；false：关闭调试
     * @return void
     * @author Elmer Zhang
     */
    public function debug($on)
    {
        if ($on)
        {
            $this->impl_->setDebugOn();
        }
        else
        {
            $this->impl_->setDebugOff();
        }
    }

    private $impl_;

    private $opt_;

    private $headers_;

}

/**
 * FetchUrl , the sub class of SaeFetchurl
 *
 *
 * @package sae
 * @subpackage fetchurl
 * @author  zhiyong
 * @ignore
 */
class FetchUrl
{
    const end_ = "http://fetchurl.sae.sina.com.cn/";
    const maxRedirect_ = 5;

    public static $disabledHeaders = array(
        'content-length',
        'host',
        'vary',
        'via',
        'x-forwarded-for',
        'fetchurl',
        'accesskey',
        'timestamp',
        'signature',
        'allowtruncated',
        'connecttimeout',
        'sendtimeout',
        'readtimeout'
    );

    public function __construct($accesskey, $secretkey)
    {
        $accesskey = trim($accesskey);
        $secretkey = trim($secretkey);

        $this->accesskey_ = $accesskey;
        $this->secretkey_ = $secretkey;

        $this->errno_ = 0;
        $this->error_ = null;
        $this->debug_ = false;
    }

    public function __destruct()
    {
        // do nothing
    }

    public function setAccesskey($accesskey)
    {
        $accesskey = trim($accesskey);
        $this->accesskey_ = $accesskey;
    }

    public function setSecretkey($secretkey)
    {
        $secretkey = trim($secretkey);
        $this->secretkey_ = $secretkey;
    }

    public function setDebugOn()
    {
        $this->debug_ = true;
    }

    public function setDebugOff()
    {
        $this->debug_ = false;
    }

    public function fetch($url, $opt = null, $headers = null, $callback = null)
    {

        $url = trim($url);
        if (substr($url, 0, 7)!='http://'&&substr($url, 0, 8)!='https://')
        {
            $url = 'http://'.$url;
        }

        $this->callback_ = $callback;

        $maxRedirect = FetchUrl::maxRedirect_;
        if (is_array($opt)&&array_key_exists('redirect', $opt)&&!$opt['redirect'])
        {
            $maxRedirect = 1;
        }

        for ($i = 0; $i<$maxRedirect; ++$i)
        {
            $this->dofetch($url, $opt, $headers);
            if ($this->errno_==0)
            {
                if ($this->httpCode_==301||$this->httpCode_==302)
                {
                    $matchs = array();
                    if (preg_match('/Location:\s([^\r\n]+)/i', $this->header_, $matchs))
                    {
                        $newUrl = $matchs[1];
                        // if new domain
                        if (strncasecmp($newUrl, "http://", strlen("http://"))==0)
                        {
                            $url = $newUrl;
                        }
                        else
                        {
                            $url = preg_replace('/^((?:https?:\/\/)?[^\/]+)\/(.*)$/i', '$1', $url)."/".$newUrl;
                        }

                        if ($this->debug_)
                        {
                            echo "[debug] redirect to $url\n";
                        }
                        continue;
                    }
                }
            }
            break;
        }

        if ($this->errno_==0&&$this->httpCode_==200)
        {
            return $this->body_;
        }
        else
        {
            return false;
        }
    }

    public function headerContent()
    {
        return $this->header_;
    }

    public function errno()
    {
        return $this->errno_;
    }

    public function error()
    {
        return $this->error_;
    }

    public function httpCode()
    {
        return $this->httpCode_;
    }

    public function body()
    {
        return $this->body_;
    }

    public function httpDesc()
    {
        return $this->httpDesc_;
    }

    private function signature($url, $timestamp)
    {
        $content = "FetchUrl".$url."TimeStamp".$timestamp."AccessKey".$this->accesskey_;
        $signature = (base64_encode(hash_hmac('sha256', $content, $this->secretkey_, true)));
        if ($this->debug_)
        {
            echo "[debug] content: $content"."\n";
            echo "[debug] signature: $signature"."\n";
        }
        return $signature;
    }

    // we have to set wirteBody & writeHeader public
    // for we used them in curl_setopt()
    public function writeBody($ch, $body)
    {
        if ($this->callback_)
        {
            call_user_func($this->callback_, $body);
        }
        else
        {
            $this->body_ .= $body;
        }
        if ($this->debug_)
        {
            echo "[debug] body => $body";
        }
        return strlen($body);
    }

    public function writeHeader($ch, $header)
    {
        $this->header_ .= $header;
        if ($this->debug_)
        {
            echo "[debug] header => $header";
        }
        return strlen($header);
    }

    private function dofetch($url, $opt, $headers_)
    {

        $this->header_ = $this->body_ = null;
        $headers = array();

        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_WRITEFUNCTION, array(
            $this,
            'writeBody'
        ));
        curl_setopt($ch, CURLOPT_HEADERFUNCTION, array(
            $this,
            'writeHeader'
        ));
        if ($this->debug_)
        {
            curl_setopt($ch, CURLOPT_VERBOSE, true);
        }

        if (is_array($opt)&&!empty($opt))
        {
            foreach ( $opt as $k => $v )
            {
                switch (strtolower($k))
                {
                    case 'username' :
                        if (array_key_exists("password", $opt))
                        {
                            curl_setopt($ch, CURLOPT_USERPWD, $v.":".$opt["password"]);
                        }
                        break;
                    case 'password' :
                        if (array_key_exists("username", $opt))
                        {
                            curl_setopt($ch, CURLOPT_USERPWD, $opt["username"].":".$v);
                        }
                        break;
                    case 'useragent' :
                        curl_setopt($ch, CURLOPT_USERAGENT, $v);
                        break;
                    case 'post' :
                        curl_setopt($ch, CURLOPT_POSTFIELDS, $v);
                        break;
                    case 'cookie' :
                        curl_setopt($ch, CURLOPT_COOKIESESSION, true);
                        curl_setopt($ch, CURLOPT_COOKIE, $v);
                        break;
                    case 'multipart' :
                        if ($v) array_push($headers, "Content-Type: multipart/form-data");
                        break;
                    case 'truncated' :
                        array_push($headers, "AllowTruncated:".$v);
                        break;
                    case 'connecttimeout' :
                        array_push($headers, "ConnectTimeout:".intval($v));
                        break;
                    case 'sendtimeout' :
                        array_push($headers, "SendTimeout:".intval($v));
                        break;
                    case 'readtimeout' :
                        array_push($headers, "ReadTimeout:".intval($v));
                        break;
                    default :
                        break;

                }
            }
        }

        if (isset($opt['method']))
        {
            if (strtolower($opt['method'])=='get')
            {
                curl_setopt($ch, CURLOPT_HTTPGET, true);
            }
        }

        if (is_array($headers_)&&!empty($headers_))
        {
            foreach ( $headers_ as $k => $v )
            {
                if (!in_array(strtolower($k), FetchUrl::$disabledHeaders))
                {
                    array_push($headers, "{$k}:".$v);
                }
            }
        }

        curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
        curl_setopt($ch, CURLOPT_ENCODING, "");
        curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
        curl_setopt($ch, CURLINFO_HEADER_OUT, true);

        curl_exec($ch);
        $info = curl_getinfo($ch);
        if ($this->debug_)
        {
            echo "[debug] curl_getinfo => ".print_r($info, true)."\n";
        }
        $this->errno_ = curl_errno($ch);
        $this->error_ = curl_error($ch);

        if ($this->errno_==0)
        {
            $matchs = array();
            if (preg_match('/^(?:[^\s]+)\s([^\s]+)\s([^\r\n]+)/', $this->header_, $matchs))
            {
                $this->httpCode_ = $matchs[1];
                $this->httpDesc_ = $matchs[2];
                if ($this->debug_)
                {
                    echo "[debug] httpCode = ".$this->httpCode_."  httpDesc = ".$this->httpDesc_."\n";
                }
            }
            else
            {
                $this->errno_ = -1;
                $this->error_ = "invalid response";
            }
        }
        curl_close($ch);
    }

    private $accesskey_;

    private $secretkey_;

    private $errno_;

    private $error_;

    private $httpCode_;

    private $httpDesc_;

    private $header_;

    private $body_;

    private $debug_;

}
